## -- Attaching packages --------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Registered S3 method overwritten by 'seriation':
## method from
## reorder.hclust gclus
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
This file is based on the “DataCleaning_Seine_2019” R Markdown script. Extracted relevant salmon abundances (with zeros where we seined but caught no salmon), environmental information (from YSI meter), habitat information (i.e. shoot/flowering shoot density/m2), and otter density by site.
# Take a look at data
glimpse(dat)
## Observations: 42
## Variables: 30
## $ X <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ...
## $ site <fct> Salt Lake Bay, Nossuk Bay, Chus...
## $ n_surv1 <int> 90, 28, 60, 104, 21, 108, 65, 1...
## $ n_surv2 <int> 48, 50, 50, 82, 31, 132, 27, 5,...
## $ dens_surv1 <dbl> 9.9447514, 4.3495146, 5.4533061...
## $ dens_surv2 <dbl> 5.30386740, 7.76699029, 4.54442...
## $ avg_density <dbl> 7.62430939, 6.05825243, 4.99886...
## $ sd_density <dbl> 3.28160053, 2.41652026, 0.64267...
## $ year <int> 2017, 2017, 2017, 2017, 2017, 2...
## $ dissolved_02_mg.l_surface <dbl> 12.80, 10.58, 8.68, 9.55, 11.62...
## $ dissolved_02_percent_surface <dbl> NA, 115.6, 98.8, 104.5, 135.3, ...
## $ specific_conductivity_surface <dbl> NA, 47004.0, 4549.4, 44498.0, 4...
## $ salinity_ppt_surface <dbl> 6.0, 30.4, 29.4, 28.9, 29.0, 31...
## $ temperature_C_surface <dbl> 8.0, 10.9, 12.9, 11.7, 14.1, 12...
## $ dissolved_02_mg.l_transect <dbl> 13.30, 10.72, 8.95, 10.03, 10.7...
## $ dissolved_02_percent_transect <dbl> NA, 116.60, 101.40, 108.20, 124...
## $ specific_conductivity_transect <int> NA, 47741, 45527, 47365, 45655,...
## $ salinity_ppt_transect <dbl> 28.0, 30.9, 29.5, 30.6, 29.7, 3...
## $ temperature_C_transect <dbl> 8.0, 10.3, 12.8, 11.0, 13.7, 11...
## $ avg_shoot <dbl> 226.5, 245.0, 220.5, 180.5, 205...
## $ sd_shoot <dbl> 40.22437, 57.55991, 71.85501, 4...
## $ avg_flowering <dbl> 0.0, 0.0, 6.0, 0.0, 2.5, 2.0, 1...
## $ sd_flowering <dbl> 0.000000, 0.000000, 3.703280, 0...
## $ date <fct> 2017-05-12, 2017-05-15, 2017-06...
## $ juli_date <int> 132, 135, 164, 189, 220, 221, 2...
## $ SALCHIN <int> 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ SALCHUM <int> 46, 75, 1, 0, 0, 1, 0, 56, 0, 6...
## $ SALCOHO <int> 0, 4, 0, 0, 0, 1, 0, 6, 0, 0, 0...
## $ SALPINK <int> 10, 8, 0, 0, 0, 0, 0, 2, 0, 2, ...
## $ SALSOCK <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
# do some minor clean up
dat <- dat[-42,-1]
dat$year <- as.factor(dat$year)
dat[,25:29] <- data.frame(lapply(dat[,25:29], as.numeric), stringsAsFactors = FALSE)
dat <- dat %>%
rowwise() %>%
mutate(abundance = sum(SALCHIN, SALCHUM, SALCOHO, SALPINK, SALSOCK))
# visualize relationships among variables: scatterplot matricies
corrgram(dat[,c(4,5,8,9,12:14,17:19,21,25:29)], lower.panel=panel.shade, upper.panel=panel.ellipse,
diag.panel=panel.density)
pairs(dat[,c(5,6,9,10,13:15,18:20,22,25:30)], lower.panel = panel.smooth)
# graph abundance data
dat$site <- reorder(dat$site, -dat$avg_density)
dat %>%
group_by(site, avg_density, year) %>%
summarise(abundance = as.numeric(sum(SALCHIN, SALCHUM, SALCOHO, SALPINK, SALSOCK))) %>%
ggplot() + geom_bar(aes(x = site, y = abundance, fill = year), stat = "identity", position = "dodge") + theme(axis.text.x = element_text(angle = 90))
## Warning: Grouping rowwise data frame strips rowwise nature
# look at the sites against the average sea otter density
ggplot(data = dat) + geom_bar(aes(x = site, y = avg_density, fill = year), stat = "identity") + theme(axis.text.x = element_text(angle = 90))
## Warning: Removed 1 rows containing missing values (position_stack).
# visualize distribution of salmon abundance
hist(x = dat$abundance)
boxplot(dat$abundance)
# distribution of abundances
# Can see that there are a lot of zeros and a few low, some mid, and only a few high abundances
plot(table(dat$abundance))
hist(as.numeric(dat$abundance > 0))
dat$year <- as.factor(dat$year)
summary(dat)
## site n_surv1 n_surv2 dens_surv1
## Kaguk Cove : 2 Min. : 0.00 Min. : 0.0 Min. : 0.000
## Guktu Bay : 2 1st Qu.: 1.00 1st Qu.: 1.0 1st Qu.: 0.113
## Salt Lake Bay: 2 Median : 18.00 Median : 14.0 Median : 1.221
## Chusini Cove : 2 Mean : 30.22 Mean : 36.4 Mean : 3.127
## Shinaku Inlet: 2 3rd Qu.: 60.00 3rd Qu.: 48.0 3rd Qu.: 5.453
## Nossuk Bay : 2 Max. :108.00 Max. :278.0 Max. :15.465
## (Other) :29 NA's :1
## dens_surv2 avg_density sd_density year
## Min. : 0.0000 Min. : 0.00000 Min. : 0.00000 2017:21
## 1st Qu.: 0.1011 1st Qu.: 0.09921 1st Qu.: 0.07406 2019:20
## Median : 0.6984 Median : 0.95503 Median : 0.19105
## Mean : 4.1787 Mean : 3.62229 Mean : 1.62919
## 3rd Qu.: 5.3163 3rd Qu.: 4.90825 3rd Qu.: 1.95997
## Max. :39.8513 Max. :26.84015 Max. :18.40055
## NA's :1 NA's :1 NA's :1
## dissolved_02_mg.l_surface dissolved_02_percent_surface
## Min. : 8.240 Min. : 90.7
## 1st Qu.: 9.578 1st Qu.:104.2
## Median :10.185 Median :112.2
## Mean :10.400 Mean :114.6
## 3rd Qu.:10.950 3rd Qu.:121.2
## Max. :13.030 Max. :146.7
## NA's :1 NA's :3
## specific_conductivity_surface salinity_ppt_surface temperature_C_surface
## Min. : 4549 Min. : 3.80 Min. : 8.00
## 1st Qu.:44876 1st Qu.:28.98 1st Qu.:11.30
## Median :47208 Median :30.40 Median :12.45
## Mean :43771 Mean :27.80 Mean :12.11
## 3rd Qu.:48976 3rd Qu.:31.60 3rd Qu.:12.82
## Max. :64325 Max. :32.60 Max. :14.70
## NA's :3 NA's :1 NA's :1
## dissolved_02_mg.l_transect dissolved_02_percent_transect
## Min. : 6.520 Min. : 9.87
## 1st Qu.: 9.453 1st Qu.:102.33
## Median : 10.085 Median :112.35
## Mean : 12.577 Mean :108.23
## 3rd Qu.: 11.000 3rd Qu.:119.47
## Max. :105.000 Max. :150.10
## NA's :1 NA's :3
## specific_conductivity_transect salinity_ppt_transect
## Min. :38426 Min. :24.30
## 1st Qu.:46075 1st Qu.:29.77
## Median :47603 Median :30.80
## Mean :47332 Mean :30.62
## 3rd Qu.:49095 3rd Qu.:31.73
## Max. :50436 Max. :32.90
## NA's :3 NA's :1
## temperature_C_transect avg_shoot sd_shoot avg_flowering
## Min. : 8.00 Min. : 93.0 Min. : 29.45 Min. :0.000
## 1st Qu.:11.10 1st Qu.: 220.5 1st Qu.: 48.73 1st Qu.:0.000
## Median :12.25 Median : 283.0 Median : 69.69 Median :0.500
## Mean :11.97 Mean : 337.3 Mean : 82.85 Mean :1.522
## 3rd Qu.:12.72 3rd Qu.: 369.0 3rd Qu.: 87.29 3rd Qu.:2.500
## Max. :16.60 Max. :1291.2 Max. :287.32 Max. :8.000
## NA's :1
## sd_flowering date juli_date SALCHIN
## Min. :0.000 2017-05-10: 1 Min. :109.0 Min. :0.00000
## 1st Qu.:0.000 2017-05-11: 1 1st Qu.:131.0 1st Qu.:0.00000
## Median :1.414 2017-05-12: 1 Median :142.0 Median :0.00000
## Mean :2.059 2017-05-13: 1 Mean :159.2 Mean :0.04878
## 3rd Qu.:3.578 2017-05-14: 1 3rd Qu.:169.0 3rd Qu.:0.00000
## Max. :8.485 2017-05-15: 1 Max. :238.0 Max. :2.00000
## (Other) :35
## SALCHUM SALCOHO SALPINK SALSOCK
## Min. : 0.00 Min. : 0.000 Min. : 0.00 Min. : 0.0000
## 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.0000
## Median : 2.00 Median : 0.000 Median : 0.00 Median : 0.0000
## Mean : 15.02 Mean : 2.537 Mean : 12.95 Mean : 0.7317
## 3rd Qu.: 15.00 3rd Qu.: 1.000 3rd Qu.: 2.00 3rd Qu.: 0.0000
## Max. :128.00 Max. :27.000 Max. :287.00 Max. :27.0000
##
## abundance
## Min. : 0.00
## 1st Qu.: 0.00
## Median : 4.00
## Mean : 31.29
## 3rd Qu.: 31.00
## Max. :345.00
##
plot((abundance > 0) ~ avg_density, data = dat)
plot((abundance >0) ~ avg_shoot, data = dat)
plot((abundance >0) ~ avg_flowering, data = dat)
plot((abundance >0) ~ juli_date, data = dat)
# hard to see any real patterns of presence of salmon with otter density or shoot density, flowering density
# or julian date
table((dat$abundance >0), dat$year)
##
## 2017 2019
## FALSE 10 3
## TRUE 11 17
# there are more sites with salmon than without in 2019, likely due to the time of sampling
# makes sense that 2017 has about half and half due to the even spread of sampling across the summer.
table((dat$abundance >0), dat$juli_date)
##
## 109 113 114 125 126 127 128 129 130 131 132 133 134 135 137 138
## FALSE 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## TRUE 0 0 1 1 1 1 1 1 2 1 1 0 1 1 1 1
##
## 139 140 141 142 157 158 159 161 162 163 164 166 167 169 188 189
## FALSE 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1
## TRUE 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 0
##
## 218 219 220 221 222 223 231 238
## FALSE 1 1 1 0 1 1 1 1
## TRUE 0 0 0 1 0 0 0 0
# appears that there are more "false" i.e. no salmon later in the summer when compared to earlier in the summer, once again makes sense based on what we know about out migration of salmon.
plot(abundance ~ year, data = dat)
dat$abundance[dat$abundance > 0] <- 1
# convert to 0, 1 for modelling
plot(dat$juli_date, dat$abundance)
ggplot(dat, aes(avg_density, abundance, color = year)) + geom_jitter(width = 0.5, height = 0.05) + facet_wrap(~year, ncol = 1)
## Warning: Removed 1 rows containing missing values (geom_point).
ggplot(data = dat, aes(x = reorder(site, -avg_density), y = avg_density, fill = year)) + geom_bar(stat = "identity") + theme(axis.text.x = element_text(angle = 90)) +facet_wrap(~year, ncol = 1)
## Warning: Removed 1 rows containing missing values (position_stack).
# finally lets look at a corrigram of the response variables
library(corrgram)
corrgram(dat[,c(1,6,8,19,21,24,30)], lower.panel=panel.shade, upper.panel=panel.ellipse,
diag.panel=panel.density)
# No distinguishable pattern aside from the decline in abundance over time
Want to start by fitting a full model including interaction
#Try a fit that includes sea otter density and date, including an interaction
fit1 <- glm(abundance ~ avg_density*juli_date*year, family = binomial(link = logit), data = dat)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(fit1)
##
## Call:
## glm(formula = abundance ~ avg_density * juli_date * year, family = binomial(link = logit),
## data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.42287 -0.06052 0.00000 0.34396 1.39514
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.106e+01 4.868e+00 2.273 0.0230 *
## avg_density -8.807e-01 8.876e-01 -0.992 0.3211
## juli_date -6.153e-02 2.669e-02 -2.305 0.0212 *
## year2019 9.894e+01 4.095e+03 0.024 0.9807
## avg_density:juli_date 4.925e-03 4.543e-03 1.084 0.2783
## avg_density:year2019 -1.200e+04 3.155e+05 -0.038 0.9697
## juli_date:year2019 -6.855e-01 2.939e+01 -0.023 0.9814
## avg_density:juli_date:year2019 1.052e+02 2.766e+03 0.038 0.9697
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 50.446 on 39 degrees of freedom
## Residual deviance: 17.751 on 32 degrees of freedom
## (1 observation deleted due to missingness)
## AIC: 33.751
##
## Number of Fisher Scoring iterations: 25
# try without interaction (not significant)
fit1.red <- glm(abundance ~ avg_density+juli_date+year, family = binomial(link = logit), data = dat)
summary(fit1.red)
##
## Call:
## glm(formula = abundance ~ avg_density + juli_date + year, family = binomial(link = logit),
## data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2401 -0.5612 0.4685 0.6753 1.4344
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.00802 2.56731 2.340 0.0193 *
## avg_density 0.13662 0.10609 1.288 0.1978
## juli_date -0.03516 0.01431 -2.456 0.0140 *
## year2019 0.14592 0.99396 0.147 0.8833
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 50.446 on 39 degrees of freedom
## Residual deviance: 36.306 on 36 degrees of freedom
## (1 observation deleted due to missingness)
## AIC: 44.306
##
## Number of Fisher Scoring iterations: 5
fit1.time <- glm(abundance ~ juli_date+year, family = binomial(link = logit), data = dat[-41,])
summary(fit1.time)
##
## Call:
## glm(formula = abundance ~ juli_date + year, family = binomial(link = logit),
## data = dat[-41, ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3462 -0.7185 0.5535 0.6557 1.6963
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.71661 2.45065 2.333 0.0197 *
## juli_date -0.03115 0.01325 -2.351 0.0187 *
## year2019 0.36545 0.95053 0.384 0.7006
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 50.446 on 39 degrees of freedom
## Residual deviance: 38.849 on 37 degrees of freedom
## AIC: 44.849
##
## Number of Fisher Scoring iterations: 4
# compare AIC values and Analysis of Deviance
AIC(fit1, fit1.red, fit1.time) # reduced has slightly lower AIC value
## df AIC
## fit1 8 33.75077
## fit1.red 4 44.30606
## fit1.time 3 44.84905
anova(fit1, fit1.red, fit1.time, test = "Chisq") # the null hypothesis is rejected indicating
## Analysis of Deviance Table
##
## Model 1: abundance ~ avg_density * juli_date * year
## Model 2: abundance ~ avg_density + juli_date + year
## Model 3: abundance ~ juli_date + year
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 32 17.751
## 2 36 36.306 -4 -18.555 0.0009609 ***
## 3 37 38.849 -1 -2.543 0.1107849
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# that the true model is the second model that includes avg_density, juli_date, and y ear
anova(fit1, test = "Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: abundance
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 39 50.446
## avg_density 1 1.8074 38 48.639 0.1788189
## juli_date 1 12.3115 37 36.328 0.0004502 ***
## year 1 0.0215 36 36.306 0.8834323
## avg_density:juli_date 1 0.0505 35 36.256 0.8221146
## avg_density:year 1 3.2129 34 33.043 0.0730593 .
## juli_date:year 1 2.6951 33 30.348 0.1006574
## avg_density:juli_date:year 1 12.5967 32 17.751 0.0003864 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Try another fit using seagrass densities and date
# If the seagrass and sea otter have a hypothesized relationship (based on WR work) they should
# be in included separately.
fit2.intx <- glm(abundance ~ avg_shoot * avg_flowering * juli_date * year, family= binomial(link = logit), data = dat[-41,])
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(fit2.intx)
##
## Call:
## glm(formula = abundance ~ avg_shoot * avg_flowering * juli_date *
## year, family = binomial(link = logit), data = dat[-41, ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.04365 -0.00008 0.00000 0.17662 1.65162
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) 5.684e+00 2.435e+01 0.233
## avg_shoot 1.941e-02 1.134e-01 0.171
## avg_flowering -2.133e+01 2.978e+01 -0.716
## juli_date -2.263e-02 1.631e-01 -0.139
## year2019 2.480e+02 3.419e+05 0.001
## avg_shoot:avg_flowering 9.299e-02 1.472e-01 0.632
## avg_shoot:juli_date -1.745e-04 7.905e-04 -0.221
## avg_flowering:juli_date 1.322e-01 1.889e-01 0.700
## avg_shoot:year2019 -3.244e+00 1.820e+03 -0.002
## avg_flowering:year2019 1.130e+03 2.300e+05 0.005
## juli_date:year2019 -2.588e+00 2.372e+03 -0.001
## avg_shoot:avg_flowering:juli_date -5.674e-04 9.369e-04 -0.606
## avg_shoot:avg_flowering:year2019 -1.197e-01 5.687e+02 0.000
## avg_shoot:juli_date:year2019 2.926e-02 1.347e+01 0.002
## avg_flowering:juli_date:year2019 -5.877e+00 1.244e+03 -0.005
## avg_shoot:avg_flowering:juli_date:year2019 -4.250e-03 4.314e+00 -0.001
## Pr(>|z|)
## (Intercept) 0.815
## avg_shoot 0.864
## avg_flowering 0.474
## juli_date 0.890
## year2019 0.999
## avg_shoot:avg_flowering 0.528
## avg_shoot:juli_date 0.825
## avg_flowering:juli_date 0.484
## avg_shoot:year2019 0.999
## avg_flowering:year2019 0.996
## juli_date:year2019 0.999
## avg_shoot:avg_flowering:juli_date 0.545
## avg_shoot:avg_flowering:year2019 1.000
## avg_shoot:juli_date:year2019 0.998
## avg_flowering:juli_date:year2019 0.996
## avg_shoot:avg_flowering:juli_date:year2019 0.999
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 50.446 on 39 degrees of freedom
## Residual deviance: 15.079 on 24 degrees of freedom
## AIC: 47.079
##
## Number of Fisher Scoring iterations: 23
# drop the interactions, not significant
fit2 <- glm(abundance ~ avg_shoot + avg_flowering + juli_date + year, family = binomial(link = logit), data = dat[-41,])
summary(fit2)
##
## Call:
## glm(formula = abundance ~ avg_shoot + avg_flowering + juli_date +
## year, family = binomial(link = logit), data = dat[-41, ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3329 -0.7028 0.5193 0.6456 1.6576
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.939517 2.477246 2.398 0.0165 *
## avg_shoot -0.001229 0.001853 -0.663 0.5072
## avg_flowering 0.052508 0.204545 0.257 0.7974
## juli_date -0.031212 0.013522 -2.308 0.0210 *
## year2019 0.625118 1.067014 0.586 0.5580
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 50.446 on 39 degrees of freedom
## Residual deviance: 38.340 on 35 degrees of freedom
## AIC: 48.34
##
## Number of Fisher Scoring iterations: 4
# looks like julian date is important....
# lets drop the other non significant values and see
fit2.red <- glm(abundance ~ juli_date + year, family = binomial(link = logit), data = dat[-41,])
summary(fit2.red)
##
## Call:
## glm(formula = abundance ~ juli_date + year, family = binomial(link = logit),
## data = dat[-41, ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3462 -0.7185 0.5535 0.6557 1.6963
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.71661 2.45065 2.333 0.0197 *
## juli_date -0.03115 0.01325 -2.351 0.0187 *
## year2019 0.36545 0.95053 0.384 0.7006
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 50.446 on 39 degrees of freedom
## Residual deviance: 38.849 on 37 degrees of freedom
## AIC: 44.849
##
## Number of Fisher Scoring iterations: 4
anova(fit2.intx, fit2, fit2.red, test= "Chisq")
## Analysis of Deviance Table
##
## Model 1: abundance ~ avg_shoot * avg_flowering * juli_date * year
## Model 2: abundance ~ avg_shoot + avg_flowering + juli_date + year
## Model 3: abundance ~ juli_date + year
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 24 15.079
## 2 35 38.340 -11 -23.2603 0.01624 *
## 3 37 38.849 -2 -0.5094 0.77513
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(fit2.intx, fit2, fit2.red) # fit 2 reduced only reduces the AIC values by 3...
## df AIC
## fit2.intx 16 47.07934
## fit2 5 48.33961
## fit2.red 3 44.84905
anova(fit2.intx, test = "Chisq")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: abundance
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev
## NULL 39 50.446
## avg_shoot 1 0.0266 38 50.420
## avg_flowering 1 0.0862 37 50.334
## juli_date 1 11.6468 36 38.687
## year 1 0.3473 35 38.340
## avg_shoot:avg_flowering 1 4.2249 34 34.115
## avg_shoot:juli_date 1 0.0526 33 34.062
## avg_flowering:juli_date 1 0.0820 32 33.980
## avg_shoot:year 1 1.4187 31 32.561
## avg_flowering:year 1 0.2359 30 32.325
## juli_date:year 1 2.0588 29 30.267
## avg_shoot:avg_flowering:juli_date 1 0.0158 28 30.251
## avg_shoot:avg_flowering:year 1 0.2603 27 29.991
## avg_shoot:juli_date:year 1 2.7433 26 27.247
## avg_flowering:juli_date:year 1 12.1679 25 15.079
## avg_shoot:avg_flowering:juli_date:year 1 0.0000 24 15.079
## Pr(>Chi)
## NULL
## avg_shoot 0.8705576
## avg_flowering 0.7690317
## juli_date 0.0006431 ***
## year 0.5556475
## avg_shoot:avg_flowering 0.0398348 *
## avg_shoot:juli_date 0.8185266
## avg_flowering:juli_date 0.7746074
## avg_shoot:year 0.2336206
## avg_flowering:year 0.6271743
## juli_date:year 0.1513321
## avg_shoot:avg_flowering:juli_date 0.8998829
## avg_shoot:avg_flowering:year 0.6099220
## avg_shoot:juli_date:year 0.0976606 .
## avg_flowering:juli_date:year 0.0004862 ***
## avg_shoot:avg_flowering:juli_date:year 1.0000000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# How do we look at these two models with different parameters if the models aren't nested?
visreg(fit1.red, scale = "response", overlapy = T, gg=T)
## [[1]]
##
## [[2]]
##
## [[3]]
visreg(fit2, scale = "response", overlay = T, gg = T)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
# diagnostic plots taken with a grain of salt for binomial models
par(mfrow=c(2,2))
plot(fit1.red, which=1:4)
# potential outliers with large influence based on Cook's distance
dat[4,] # Kaguk (high density, seined in july)
## Source: local data frame [1 x 30]
## Groups: <by row>
##
## # A tibble: 1 x 30
## site n_surv1 n_surv2 dens_surv1 dens_surv2 avg_density sd_density year
## <fct> <int> <int> <dbl> <dbl> <dbl> <dbl> <fct>
## 1 Kagu~ 104 82 15.5 12.2 13.8 2.31 2017
## # ... with 22 more variables: dissolved_02_mg.l_surface <dbl>,
## # dissolved_02_percent_surface <dbl>,
## # specific_conductivity_surface <dbl>, salinity_ppt_surface <dbl>,
## # temperature_C_surface <dbl>, dissolved_02_mg.l_transect <dbl>,
## # dissolved_02_percent_transect <dbl>,
## # specific_conductivity_transect <int>, salinity_ppt_transect <dbl>,
## # temperature_C_transect <dbl>, avg_shoot <dbl>, sd_shoot <dbl>,
## # avg_flowering <dbl>, sd_flowering <dbl>, date <fct>, juli_date <int>,
## # SALCHIN <dbl>, SALCHUM <dbl>, SALCOHO <dbl>, SALPINK <dbl>,
## # SALSOCK <dbl>, abundance <dbl>
dat[6,] # Guktu (high, density, seined in August)
## Source: local data frame [1 x 30]
## Groups: <by row>
##
## # A tibble: 1 x 30
## site n_surv1 n_surv2 dens_surv1 dens_surv2 avg_density sd_density year
## <fct> <int> <int> <dbl> <dbl> <dbl> <dbl> <fct>
## 1 Gukt~ 108 132 12.3 15.0 13.7 1.93 2017
## # ... with 22 more variables: dissolved_02_mg.l_surface <dbl>,
## # dissolved_02_percent_surface <dbl>,
## # specific_conductivity_surface <dbl>, salinity_ppt_surface <dbl>,
## # temperature_C_surface <dbl>, dissolved_02_mg.l_transect <dbl>,
## # dissolved_02_percent_transect <dbl>,
## # specific_conductivity_transect <int>, salinity_ppt_transect <dbl>,
## # temperature_C_transect <dbl>, avg_shoot <dbl>, sd_shoot <dbl>,
## # avg_flowering <dbl>, sd_flowering <dbl>, date <fct>, juli_date <int>,
## # SALCHIN <dbl>, SALCHUM <dbl>, SALCOHO <dbl>, SALPINK <dbl>,
## # SALSOCK <dbl>, abundance <dbl>
# Try and refit the model (fit1.red) without these outliers
fit1.red.up <- glm(abundance ~ avg_density+juli_date+year, family = binomial(link = logit), data = dat[-c(4,6),])
summary(fit1.red.up)
##
## Call:
## glm(formula = abundance ~ avg_density + juli_date + year, family = binomial(link = logit),
## data = dat[-c(4, 6), ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2173 -0.4889 0.3298 0.6651 1.4967
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.41044 2.81176 2.280 0.0226 *
## avg_density 0.29214 0.23605 1.238 0.2159
## juli_date -0.03813 0.01563 -2.439 0.0147 *
## year2019 -0.10476 1.06320 -0.099 0.9215
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 47.398 on 37 degrees of freedom
## Residual deviance: 31.438 on 34 degrees of freedom
## (1 observation deleted due to missingness)
## AIC: 39.438
##
## Number of Fisher Scoring iterations: 6
visreg(fit1.red.up, scale = "response", overlap = T, gg=T)
## [[1]]
##
## [[2]]
##
## [[3]]
par(mfrow=c(2,2))
plot(fit1.red.up, which=1:4) # does this look better?
Fit the data to a model using abundances
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
##
## Call:
## glm(formula = log(abundance + 1) ~ dens_surv1 + dens_surv2 +
## year + juli_date + avg_shoot + avg_flowering + dissolved_02_mg.l_surface +
## salinity_ppt_surface + temperature_C_surface + dissolved_02_mg.l_transect +
## salinity_ppt_transect + temperature_C_transect, family = poisson,
## data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.14938 -0.47115 0.01652 0.27558 0.72588
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.5483851 7.6239328 0.203 0.839
## dens_surv1 0.0327237 0.0925717 0.353 0.724
## dens_surv2 0.0058663 0.0451706 0.130 0.897
## year2019 0.0099484 0.9017173 0.011 0.991
## juli_date -0.0141727 0.0151322 -0.937 0.349
## avg_shoot -0.0001681 0.0013338 -0.126 0.900
## avg_flowering 0.0463769 0.1266167 0.366 0.714
## dissolved_02_mg.l_surface -0.0631941 0.3260231 -0.194 0.846
## salinity_ppt_surface 0.0106196 0.0693024 0.153 0.878
## temperature_C_surface 0.2296211 0.5531599 0.415 0.678
## dissolved_02_mg.l_transect 0.0058499 0.0161033 0.363 0.716
## salinity_ppt_transect 0.0149432 0.2101998 0.071 0.943
## temperature_C_transect -0.2766764 0.5495297 -0.503 0.615
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 14.614 on 38 degrees of freedom
## Residual deviance: 10.397 on 26 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: Inf
##
## Number of Fisher Scoring iterations: 5
##
## Call:
## glm(formula = log(abundance + 1) ~ avg_density + year + juli_date +
## avg_shoot + avg_flowering + dissolved_02_mg.l_surface + salinity_ppt_surface +
## temperature_C_surface + dissolved_02_mg.l_transect + salinity_ppt_transect +
## temperature_C_transect, family = quasipoisson, data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.13508 -0.49083 0.01386 0.26941 0.71660
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.0795717 3.7326437 0.557 0.582
## avg_density 0.0271487 0.0277611 0.978 0.337
## year2019 0.0317323 0.4611927 0.069 0.946
## juli_date -0.0140974 0.0078301 -1.800 0.083 .
## avg_shoot -0.0002092 0.0006897 -0.303 0.764
## avg_flowering 0.0450778 0.0657033 0.686 0.499
## dissolved_02_mg.l_surface -0.0803180 0.1656396 -0.485 0.632
## salinity_ppt_surface 0.0083765 0.0355478 0.236 0.815
## temperature_C_surface 0.2163638 0.2824358 0.766 0.450
## dissolved_02_mg.l_transect 0.0052267 0.0081826 0.639 0.528
## salinity_ppt_transect 0.0106419 0.1081765 0.098 0.922
## temperature_C_transect -0.2738793 0.2832244 -0.967 0.342
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.2684921)
##
## Null deviance: 14.614 on 38 degrees of freedom
## Residual deviance: 10.443 on 27 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
## Warning in sqrt(diag(vc_count)[kx + 1]): NaNs produced